# -*- coding:UTF-8 -*-
"""
@Project:   DataCrawler
@FileName:  url_manager.py 
@CreateDate:2023/4/22 23:21  
@Author:    Jia  
@Desc:      URL管理器

'no_crawled':  redis未爬取
'is_crawled'： redis已爬取
"""


class UrlManager:
    def __init__(self, rds):
        # 实例化一个redis连接
        self.rds = rds

    def add_new_url(self, url):
        """添加新URL到未爬取的集合中"""

        if url is None:
            return
        elif not self.rds.sismember('no_crawled', url) and not self.rds.sismember('is_crawled', url):
            self.rds.sadd('no_crawled', url)

    async def add_new_urls(self):
        """将URLS中待爬取的URL放到待爬取的集合中"""

        urls = self.rds.scard('new_urls')
        if urls is None or urls == 0:
            return
        else:
            for url in self.rds.sscan_iter('new_urls'):
                self.add_new_url(url)

    def add_fail_urls(self, url):
        """把失败的URL放入失败的集合中"""

        self.rds.sadd('fail_urls', url)

    async def get_url(self):
        """从未爬取集合随机取一个URL,取完把它放进已爬取集合中"""

        url = self.rds.spop('no_crawled')
        self.rds.sadd('is_crawled', url)
        return url

    async def second_get_url(self):
        """从未爬取集合随机取一个URL,取完把它放进已爬取集合中"""

        url = self.rds.spop('second_no_crawled')
        self.rds.sadd('second_is_crawled', url)
        return url

    def has_no_crawled(self):
        """
        查询未爬取URL集合是否为空
        :return True 不为空
        :return False 空
        """

        num = self.rds.scard('no_crawled')
        if num != 0:
            return True
        else:
            return False

    def second_has_crawled(self):
        """
        查询未爬取URL集合是否为空
        :return True 不为空
        :return False 空
        """
        num = self.rds.scard('second_no_crawled')
        if num != 0:
            return True
        else:
            return False


